Pet breed detection with ResNet50¶

Data import and discovery¶

Library and data import¶

In [4]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.applications.resnet50 import preprocess_input, ResNet50

import numpy as np
from numpy import round, sqrt, random
import matplotlib.pyplot as plt



dataset, info = tfds.load('oxford_iiit_pet',split=['train[:80%]', 'train[80%:]','test'], as_supervised=True, with_info=True)
train_set_raw, valid_set_raw, test_set_raw = dataset

# Create a dictionary of numerical label - breed key-value pairs
labels = info.features['label'].names                       # Name of the breed
label_dict = {i: breed for i,breed in enumerate(labels)}    # Dict comprehension to create key-value pairs of number-breed name
print(label_dict)

# Find out about the pixel value range
for image,_ in train_set_raw.take(1):
    print(f"Pixel value range: [{tf.reduce_min(image).numpy()}, {tf.reduce_max(image).numpy()}]")

# Number of classes
num_classes = info.features['label'].num_classes
print(f"There are {num_classes} classes of dogs and cats in this dataset")
{0: 'Abyssinian', 1: 'american_bulldog', 2: 'american_pit_bull_terrier', 3: 'basset_hound', 4: 'beagle', 5: 'Bengal', 6: 'Birman', 7: 'Bombay', 8: 'boxer', 9: 'British_Shorthair', 10: 'chihuahua', 11: 'Egyptian_Mau', 12: 'english_cocker_spaniel', 13: 'english_setter', 14: 'german_shorthaired', 15: 'great_pyrenees', 16: 'havanese', 17: 'japanese_chin', 18: 'keeshond', 19: 'leonberger', 20: 'Maine_Coon', 21: 'miniature_pinscher', 22: 'newfoundland', 23: 'Persian', 24: 'pomeranian', 25: 'pug', 26: 'Ragdoll', 27: 'Russian_Blue', 28: 'saint_bernard', 29: 'samoyed', 30: 'scottish_terrier', 31: 'shiba_inu', 32: 'Siamese', 33: 'Sphynx', 34: 'staffordshire_bull_terrier', 35: 'wheaten_terrier', 36: 'yorkshire_terrier'}
Pixel value range: [0, 255]
There are 37 classes of dogs and cats in this dataset

Input data visualization¶

In this section, I will visualize the images and labels from the dataset

In [5]:
num_samples = 9
num_rows = int(round(sqrt(num_samples))); num_cols = int(num_samples/num_rows)
index = 1
plt.figure(figsize=(num_rows*3,num_cols*3))

# Display some images from the raw train set
for image, label in train_set_raw.take(num_samples):
    # print(image)
    plt.subplot(num_rows,num_cols,index)
    plt.imshow(image.numpy().astype("uint8"))
    plt.title(f"Label: {label_dict[label.numpy()]}")
    plt.axis("off")
    index += 1
plt.show()
plt.tight_layout
No description has been provided for this image
Out[5]:
<function matplotlib.pyplot.tight_layout(*, pad: 'float' = 1.08, h_pad: 'float | None' = None, w_pad: 'float | None' = None, rect: 'tuple[float, float, float, float] | None' = None) -> 'None'>

Examine the image dimensions¶

As can be seen from the visualization above, each iamge has a unique size. I ahve created the function below to determine the minimum height and width resolution of the images in the train, validation, and test set

In [6]:
def find_min_resolution(dataset):

    min_height = float('inf')
    min_width = float('inf')
    for image, _ in dataset:
        height,width,_ = image.shape

        min_height = min(min_height,height)
        min_width = min(min_width,width)

    print(f"The minimum height resolution: {min_height}\n"
          f"The minimum width resolution: {min_width}")
    
find_min_resolution(train_set_raw)
find_min_resolution(valid_set_raw)
find_min_resolution(test_set_raw)
The minimum height resolution: 108
The minimum width resolution: 114
The minimum height resolution: 112
The minimum width resolution: 150
The minimum height resolution: 103
The minimum width resolution: 137

Image resizing¶

So the minimum height resolution is 103 and the minimum width resolution is 114. I will resize the images to have dimension of (224,224) for the pretrained pre-trained ResNet50 network. Normally, I would include the iamge preprocessing step inside the final model. However, given the variety of image dimensions, I will have to resize the images first

In [7]:
def preprocess_image(image,target_size = (96, 96),
                     display=False,
                     pad=True):
    '''This function resize an image while keeping the aspect ratio such that the shorter side is 96 pixels (by default)'''
    
    if pad:     # Resize with padding (shrink the image while preserving aspect ratio and fill void with black)
        image = tf.image.resize_with_pad(image, target_size[0],target_size[1])
    else:       # Resize without padding - stretch or shrink the image to the desired target size 
        image = tf.image.resize(image, target_size)

    return image

def preprocess_dataset(dataset, target_size = (96,96),display=False,pad=True):
    '''This function applies the preprocess_image() on the images in a dataset to resize the iamges to the target_size (default 96x96)'''
    return dataset.map(lambda image, label: (preprocess_image(image,target_size,display,pad), label))

# The desired size for the processed images
TARGET_SIZE = (224,224)
train_set_processed = preprocess_dataset(train_set_raw,target_size=TARGET_SIZE)
valid_set_processed = preprocess_dataset(valid_set_raw,target_size=TARGET_SIZE)
test_set_processed = preprocess_dataset(test_set_raw,target_size=TARGET_SIZE)

train_set_processed_display = preprocess_dataset(train_set_raw,target_size=TARGET_SIZE,display=True)

find_min_resolution(train_set_processed_display)
The minimum height resolution: 224
The minimum width resolution: 224
In [8]:
num_samples = 9
num_rows = int(round(sqrt(num_samples))); num_cols = int(num_samples/num_rows)
index = 1
plt.figure(figsize=(num_rows*3,num_cols*3))

# Display some images from the raw train set
for image, label in train_set_processed_display.take(num_samples):
    # # Examine the range of pixel value
    # print("Pixel value range:", tf.reduce_min(image).numpy(), "to", tf.reduce_max(image).numpy())
    
    plt.subplot(num_rows,num_cols,index)
    plt.imshow(image.numpy().astype("uint8"))
    plt.title(f"Label: {label_dict[label.numpy()]}")
    plt.axis("off")
    index += 1
plt.show()
plt.tight_layout
No description has been provided for this image
Out[8]:
<function matplotlib.pyplot.tight_layout(*, pad: 'float' = 1.08, h_pad: 'float | None' = None, w_pad: 'float | None' = None, rect: 'tuple[float, float, float, float] | None' = None) -> 'None'>

Dataset => Array Block¶

In this code block, I define a function to extract the image and label data from any of the three datasets. This make it easier to work with the data in the form of numpy arrays instead of PrefetchDataset objects when importing the dataset from tensorflow.

Since the dataset has images of different dimensions, this function will also resize the images to a

In [9]:
def get_nparray_dataset(dataset):
    image_list = []
    label_list = []

    for image,label in dataset:
        image_list.append(image.numpy())
        label_list.append(label.numpy())

    image_list_npararay = np.array(image_list)
    label_list_nparray = np.array(label_list)
    
    return image_list_npararay, label_list_nparray
In [10]:
train_image_array, train_label_array = get_nparray_dataset(train_set_processed)
valid_image_array, valid_label_array = get_nparray_dataset(valid_set_processed)
test_image_array, test_label_array = get_nparray_dataset(test_set_processed)

print(train_image_array.shape, train_label_array.shape)
print(valid_image_array.shape, valid_label_array.shape)
print(test_image_array.shape, test_label_array.shape)

num_train = len(train_label_array)
num_valid = len(valid_label_array)
num_test = len(test_image_array)
(2944, 224, 224, 3) (2944,)
(736, 224, 224, 3) (736,)
(3669, 224, 224, 3) (3669,)

ResNet50-based model¶

Base model construction¶

In [11]:
base_model = ResNet50(weights='imagenet',
                    #   input_shape=TARGET_SIZE+(3,),
                      include_top=False)

# Freeze the weights of the model
base_model.trainable = False

# # Investigate the structure of the base model and make sure that the weights are frozen
# base_model.summary()
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
94765736/94765736 ━━━━━━━━━━━━━━━━━━━━ 4s 0us/step

Full model architecture¶

In [12]:
model_resnet = models.Sequential([
    layers.Input(shape=TARGET_SIZE + (3,)),
    layers.Lambda(preprocess_input),
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')  # Output layer for pet breeds
])

initial_weights = model_resnet.get_weights()

model_resnet.summary()
WARNING:tensorflow:From d:\Minh Nguyen\TME_6015\.venv\Lib\site-packages\keras\src\backend\tensorflow\core.py:204: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

WARNING:tensorflow:From d:\Minh Nguyen\TME_6015\.venv\Lib\site-packages\keras\src\backend\tensorflow\core.py:204: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ lambda (Lambda)                 │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ resnet50 (Functional)           │ (None, 7, 7, 2048)     │    23,587,712 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d        │ (None, 2048)           │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization             │ (None, 2048)           │         8,192 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 2048)           │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 37)             │        75,813 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_1 (Dropout)             │ (None, 37)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 37)             │         1,406 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 23,673,123 (90.31 MB)
 Trainable params: 81,315 (317.64 KB)
 Non-trainable params: 23,591,808 (90.00 MB)

Model Training¶

In [13]:
model_resnet.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.005),
              loss=tf.keras.losses.SparseCategoricalCrossentropy,
              metrics=["accuracy"])
loss0, acc0 = model_resnet.evaluate(test_image_array,test_label_array)

print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(acc0))
115/115 ━━━━━━━━━━━━━━━━━━━━ 75s 633ms/step - accuracy: 0.0226 - loss: 4.0566
initial loss: 4.06
initial accuracy: 0.02
In [14]:
# Train the model with the base layers frozen
initial_epochs = 10
model_resnet.set_weights(initial_weights)
history_resnet = model_resnet.fit(train_image_array,train_label_array, 
                                  validation_data=(valid_image_array,valid_label_array), 
                                  epochs=initial_epochs)
Epoch 1/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 82s 841ms/step - accuracy: 0.2092 - loss: 3.7377 - val_accuracy: 0.7568 - val_loss: 1.1189
Epoch 2/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 79s 855ms/step - accuracy: 0.5072 - loss: 1.6242 - val_accuracy: 0.7948 - val_loss: 0.7152
Epoch 3/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 75s 816ms/step - accuracy: 0.6108 - loss: 1.2929 - val_accuracy: 0.8302 - val_loss: 0.5736
Epoch 4/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 75s 821ms/step - accuracy: 0.6185 - loss: 1.2443 - val_accuracy: 0.8302 - val_loss: 0.5649
Epoch 5/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 74s 807ms/step - accuracy: 0.6419 - loss: 1.1471 - val_accuracy: 0.8410 - val_loss: 0.5265
Epoch 6/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 67s 730ms/step - accuracy: 0.6522 - loss: 1.1500 - val_accuracy: 0.8628 - val_loss: 0.5085
Epoch 7/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 65s 703ms/step - accuracy: 0.6528 - loss: 1.0717 - val_accuracy: 0.8370 - val_loss: 0.5180
Epoch 8/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 68s 740ms/step - accuracy: 0.6567 - loss: 1.0980 - val_accuracy: 0.8370 - val_loss: 0.5162
Epoch 9/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 73s 795ms/step - accuracy: 0.6616 - loss: 1.0319 - val_accuracy: 0.8641 - val_loss: 0.4534
Epoch 10/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 71s 776ms/step - accuracy: 0.6779 - loss: 1.0321 - val_accuracy: 0.8668 - val_loss: 0.4673
In [15]:
def plot_performance(history, learning_rate=None, batch_size=None, finetune_epochs=None):
  plt.figure(figsize=(10,5))

  # history_data = history.history

  # Determine whether history is keras history or a dictionary to appropriately extract the history data
  if isinstance(history, keras.callbacks.History):
    history_data = history.history        # Extract the history dictionary
  else:
    history_data = history                # Assume it's already a dictionary

  # Accuracy of model training and validation vs training epoch
  plt.subplot(1,2,1)
  ylim_acc = [0, max(max(history_data['accuracy']),max(history_data['val_accuracy']))]
  plt.plot(history_data['accuracy'], label = 'Training accuracy')
  plt.plot(history_data['val_accuracy'], label = 'Validation accuracy')
  plt.ylim(ylim_acc)
  if finetune_epochs:
    plt.plot([finetune_epochs-1, finetune_epochs-1],plt.ylim(), label = 'Fine tuning')
  else:
    pass

  if learning_rate and batch_size:
    plt.title(f'Model accuracy \n lr = {learning_rate}, batch size = {batch_size}')
  else: plt.title('Model accuracy')

  plt.ylabel('Accuracy')
  plt.xlabel('Epoch')
  plt.legend(loc='lower right')

  # Loss during model training and validation
  plt.subplot(1,2,2)
  ylim_loss = [0, max(max(history_data['loss']),max(history_data['val_loss']))]
  # print(len(history_data['loss']))
  plt.plot(history_data['loss'], label = 'Training loss')
  plt.plot(history_data['val_loss'], label = 'Validation loss')
  plt.ylim(ylim_loss)
  if finetune_epochs:
    plt.plot([finetune_epochs-1, finetune_epochs-1],plt.ylim(), label = 'Fine tuning')
  else:
    pass

  if learning_rate and batch_size:
    plt.title(f'Model loss \n lr = {learning_rate}, batch size = {batch_size}')
  else: plt.title('Model loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(loc='lower right')
  plt.show()

  print(f"The model has a training accuracy of {history_data['accuracy'][-1]*100:.2f}%\n"
      f"The model has a validation accuracy of {history_data['val_accuracy'][-1]*100:.2f}%")
  return
In [16]:
plot_performance(history_resnet)
No description has been provided for this image
The model has a training accuracy of 66.61%
The model has a validation accuracy of 86.68%

Model Evaluation¶

In [17]:
test_loss, test_acc = model_resnet.evaluate(test_image_array,test_label_array)
print(f"Test accuracy: {test_acc}\n"
      f"Test loss: {test_loss}")
115/115 ━━━━━━━━━━━━━━━━━━━━ 80s 697ms/step - accuracy: 0.8395 - loss: 0.5133
Test accuracy: 0.8356500267982483
Test loss: 0.5288707613945007
In [18]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

prediction_array = np.argmax(model_resnet.predict(test_image_array), axis=1)
115/115 ━━━━━━━━━━━━━━━━━━━━ 67s 572ms/step
In [19]:
cm = confusion_matrix(test_label_array, prediction_array)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)  # You can change the color map as desired
fig = disp.ax_.get_figure() 
fig.set_figwidth(12); fig.set_figheight(10) 
plt.title("Confusion Matrix - ResNet")
plt.xticks(rotation=90, ha='right')  # Rotate x labels for better readability
plt.yticks(rotation=0)  # Keep y labels horizontal
plt.tight_layout()  # Adjust layout to make room for rotated labels
plt.show()

label_dict
No description has been provided for this image
Out[19]:
{0: 'Abyssinian',
 1: 'american_bulldog',
 2: 'american_pit_bull_terrier',
 3: 'basset_hound',
 4: 'beagle',
 5: 'Bengal',
 6: 'Birman',
 7: 'Bombay',
 8: 'boxer',
 9: 'British_Shorthair',
 10: 'chihuahua',
 11: 'Egyptian_Mau',
 12: 'english_cocker_spaniel',
 13: 'english_setter',
 14: 'german_shorthaired',
 15: 'great_pyrenees',
 16: 'havanese',
 17: 'japanese_chin',
 18: 'keeshond',
 19: 'leonberger',
 20: 'Maine_Coon',
 21: 'miniature_pinscher',
 22: 'newfoundland',
 23: 'Persian',
 24: 'pomeranian',
 25: 'pug',
 26: 'Ragdoll',
 27: 'Russian_Blue',
 28: 'saint_bernard',
 29: 'samoyed',
 30: 'scottish_terrier',
 31: 'shiba_inu',
 32: 'Siamese',
 33: 'Sphynx',
 34: 'staffordshire_bull_terrier',
 35: 'wheaten_terrier',
 36: 'yorkshire_terrier'}

Model Prediction and Visualization¶

In [20]:
# Sample random images and their indices
num_samples = 9                                                                             # number of samples to be display
num_rows = int(round(sqrt(num_samples))); num_cols = int(num_samples/num_rows)      # number of rows and columns for the subplot
rand = random.randint(num_test,size = (num_samples))                                       # random index for choosing the samples in the dataset

image_test_rand_array = test_image_array[rand]
label_test_rand_array = test_label_array[rand]
prediction_rand_array = np.argmax(model_resnet.predict(image_test_rand_array),axis=1)

plt.figure(figsize=(num_rows*3,num_cols*3))
# fig, axes1 = plt.subplots(num_rows,num_cols,figsize=(num_rows*2,num_cols2))

for i in range(num_rows):
    for j in range(num_cols):
        index = i * num_cols + j
        plt.subplot(num_rows,num_cols,index+1)
        image = image_test_rand_array[index]/255.0  # Extract the image
        label = label_test_rand_array[index]  # Extract the label
        prediction = prediction_rand_array[index]

        # Original pictures (no augmentation layer applied)
        plt.axis("off")
        # Display the image
        plt.imshow(image)
        plt.title(f"Label: {label_dict[label]}\n"
                  f"Predict: {label_dict[prediction]}",
                  fontsize = 8) 

plt.tight_layout()
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 205ms/step
No description has been provided for this image

InceptionV3-based model¶

In [21]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input as inception_preprocess

Since the inception model use similar input image shapes as ResNet, I will reuse the processed train, validation, and test sets

Base Model Construction¶

In [22]:
base_model = InceptionV3(weights='imagenet',
                         include_top=False)

# Freeze the weights of the model
base_model.trainable = False

# # Investigate the structure of the base model and make sure that the weights are frozen
# base_model.summary()
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
87910968/87910968 ━━━━━━━━━━━━━━━━━━━━ 4s 0us/step

Full Model¶

In [23]:
model_inception = models.Sequential([
    layers.Input(TARGET_SIZE + (3,)),
    layers.Lambda(inception_preprocess),
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='sigmoid')  # Use 1 for binary classification (dogs vs cats)
])

initial_weight_inception = model_inception.get_weights()

model_inception.summary()
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ lambda_1 (Lambda)               │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ inception_v3 (Functional)       │ (None, 5, 5, 2048)     │    21,802,784 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d_1      │ (None, 2048)           │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_95          │ (None, 2048)           │         8,192 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_2 (Dropout)             │ (None, 2048)           │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_2 (Dense)                 │ (None, 37)             │        75,813 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_3 (Dropout)             │ (None, 37)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_3 (Dense)                 │ (None, 37)             │         1,406 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 21,888,195 (83.50 MB)
 Trainable params: 81,315 (317.64 KB)
 Non-trainable params: 21,806,880 (83.19 MB)

Model Training¶

In [24]:
model_inception.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.005),
                        loss=tf.keras.losses.SparseCategoricalCrossentropy,
                        metrics=["accuracy"])
loss0, acc0 = model_inception.evaluate(test_image_array,test_label_array)

print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(acc0))
115/115 ━━━━━━━━━━━━━━━━━━━━ 39s 322ms/step - accuracy: 0.0353 - loss: 3.6941
initial loss: 3.71
initial accuracy: 0.03
In [25]:
# Train the model with the base layers frozen
initial_epochs = 10
model_inception.set_weights(initial_weight_inception)
history_inception = model_inception.fit(train_image_array,train_label_array, 
                                        validation_data=(valid_image_array,valid_label_array),
                                        epochs=initial_epochs)
Epoch 1/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 43s 421ms/step - accuracy: 0.3133 - loss: 3.2217 - val_accuracy: 0.7364 - val_loss: 1.0708
Epoch 2/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 39s 420ms/step - accuracy: 0.6074 - loss: 1.3399 - val_accuracy: 0.7962 - val_loss: 0.6913
Epoch 3/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 39s 419ms/step - accuracy: 0.6215 - loss: 1.2163 - val_accuracy: 0.8071 - val_loss: 0.6210
Epoch 4/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 39s 427ms/step - accuracy: 0.6833 - loss: 1.0117 - val_accuracy: 0.8166 - val_loss: 0.6181
Epoch 5/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 41s 441ms/step - accuracy: 0.6617 - loss: 1.0924 - val_accuracy: 0.8179 - val_loss: 0.6321
Epoch 6/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 42s 456ms/step - accuracy: 0.6837 - loss: 1.0168 - val_accuracy: 0.7989 - val_loss: 0.6462
Epoch 7/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 41s 443ms/step - accuracy: 0.7060 - loss: 0.9732 - val_accuracy: 0.8098 - val_loss: 0.6751
Epoch 8/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 40s 440ms/step - accuracy: 0.6877 - loss: 0.9850 - val_accuracy: 0.8302 - val_loss: 0.5780
Epoch 9/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 39s 426ms/step - accuracy: 0.7074 - loss: 0.9350 - val_accuracy: 0.8438 - val_loss: 0.5615
Epoch 10/10
92/92 ━━━━━━━━━━━━━━━━━━━━ 39s 428ms/step - accuracy: 0.6872 - loss: 0.9724 - val_accuracy: 0.8478 - val_loss: 0.6378
In [26]:
plot_performance(history_inception)
No description has been provided for this image
The model has a training accuracy of 69.53%
The model has a validation accuracy of 84.78%

Model Evaluation¶

In [27]:
test_loss_inception, test_acc_inception = model_inception.evaluate(test_image_array,test_label_array)
print(f"Test accuracy: {test_acc}\n"
      f"Test loss: {test_loss}")

prediction_array_inception = np.argmax(model_inception.predict(test_image_array), axis=1)
115/115 ━━━━━━━━━━━━━━━━━━━━ 39s 335ms/step - accuracy: 0.8462 - loss: 0.5259
Test accuracy: 0.8356500267982483
Test loss: 0.5288707613945007
115/115 ━━━━━━━━━━━━━━━━━━━━ 41s 341ms/step
In [28]:
cm = confusion_matrix(test_label_array, prediction_array_inception)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)  # You can change the color map as desired
fig = disp.ax_.get_figure() 
fig.set_figwidth(12)
fig.set_figheight(10) 
plt.title("Confusion Matrix - Inception model")
plt.xticks(rotation=90, ha='right')  # Rotate x labels for better readability
plt.yticks(rotation=0)  # Keep y labels horizontal
plt.tight_layout()  # Adjust layout to make room for rotated labels
plt.show()

label_dict
No description has been provided for this image
Out[28]:
{0: 'Abyssinian',
 1: 'american_bulldog',
 2: 'american_pit_bull_terrier',
 3: 'basset_hound',
 4: 'beagle',
 5: 'Bengal',
 6: 'Birman',
 7: 'Bombay',
 8: 'boxer',
 9: 'British_Shorthair',
 10: 'chihuahua',
 11: 'Egyptian_Mau',
 12: 'english_cocker_spaniel',
 13: 'english_setter',
 14: 'german_shorthaired',
 15: 'great_pyrenees',
 16: 'havanese',
 17: 'japanese_chin',
 18: 'keeshond',
 19: 'leonberger',
 20: 'Maine_Coon',
 21: 'miniature_pinscher',
 22: 'newfoundland',
 23: 'Persian',
 24: 'pomeranian',
 25: 'pug',
 26: 'Ragdoll',
 27: 'Russian_Blue',
 28: 'saint_bernard',
 29: 'samoyed',
 30: 'scottish_terrier',
 31: 'shiba_inu',
 32: 'Siamese',
 33: 'Sphynx',
 34: 'staffordshire_bull_terrier',
 35: 'wheaten_terrier',
 36: 'yorkshire_terrier'}

Model Prediction and Visualization¶

In [29]:
# Sample random images and their indices
num_samples = 9                                                                             # number of samples to be display
num_rows = int(round(sqrt(num_samples))); num_cols = int(num_samples/num_rows)      # number of rows and columns for the subplot
rand = random.randint(num_test,size = (num_samples))                                       # random index for choosing the samples in the dataset

image_test_rand_array = test_image_array[rand]
label_test_rand_array = test_label_array[rand]
prediction_rand_array = np.argmax(model_inception.predict(image_test_rand_array),axis=1)

plt.figure(figsize=(num_rows*3,num_cols*3))

for i in range(num_rows):
    for j in range(num_cols):
        index = i * num_cols + j
        plt.subplot(num_rows,num_cols,index+1)
        image = image_test_rand_array[index]/255.0  # Extract the image
        label = label_test_rand_array[index]  # Extract the label
        prediction = prediction_rand_array[index]

        # Original pictures (no augmentation layer applied)
        plt.axis("off")
        # Display the image
        plt.imshow(image)
        plt.title(f"Label: {label_dict[label]}\n"
                  f"Predict: {label_dict[prediction]}",
                  fontsize = 8) 

plt.tight_layout()
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 125ms/step
No description has been provided for this image

ResNet vs Inception Comparison¶

Model Performance Comparison¶

In [36]:
test_loss_resnet, test_acc_resnet = model_resnet.evaluate(test_image_array,test_label_array)
test_loss_inception, test_acc_inception = model_inception.evaluate(test_image_array,test_label_array)

print(test_acc_resnet, test_loss_resnet)
print(test_acc_inception, test_loss_inception)
115/115 ━━━━━━━━━━━━━━━━━━━━ 61s 528ms/step - accuracy: 0.8395 - loss: 0.5133
115/115 ━━━━━━━━━━━━━━━━━━━━ 38s 334ms/step - accuracy: 0.8462 - loss: 0.5259
0.8356500267982483 0.5288707613945007
0.8419187664985657 0.5285324454307556
In [37]:
msg_loss = "comparable"; msg_acc = "comparable"
test_loss_diff = test_loss_resnet - test_loss_inception
test_acc_diff = test_acc_resnet - test_acc_inception
acc_diff_threshold = 0.01; loss_diff_threshold = 0.01

if np.abs(test_loss_diff) > acc_diff_threshold:
    if test_loss_resnet > test_loss_inception: msg_loss = "better"
    else: msg_loss = "worse"

if np.abs(test_acc_diff) > loss_diff_threshold:
    if test_acc_resnet > test_acc_inception: msg_acc = "better"
    else: msg_acc = "worse"

print(f"The ResNet-based model has {msg_acc} accuracy compared to the Inception-based model\n"
      f"Resnet-based model accuracy: {test_acc_resnet*100:.2f}%\n"
      f"Inception-based model accuracy: {test_acc_inception*100:.2f}%")

print(f"The ResNet-based model has {msg_loss} accuracy compared to the Inception-based model\n"
      f"Resnet-based model loss: {test_loss_resnet:.2f}\n"
      f"Inception-based model loss: {test_loss_inception:.2f}")
The ResNet-based model has comparable accuracy compared to the Inception-based model
Resnet-based model accuracy: 83.57%
Inception-based model accuracy: 84.19%
The ResNet-based model has comparable accuracy compared to the Inception-based model
Resnet-based model loss: 0.53
Inception-based model loss: 0.53

Prediction Comparison¶

In [38]:
# Sample random images and their indices
num_samples = 25                                                                             # number of samples to be display
num_rows = int(round(sqrt(num_samples))); num_cols = int(num_samples/num_rows)      # number of rows and columns for the subplot
rand = random.randint(num_test,size = (num_samples))                                       # random index for choosing the samples in the dataset

image_test_rand_array = test_image_array[rand]
label_test_rand_array = test_label_array[rand]
prediction_rand_array_resnet = np.argmax(model_resnet.predict(image_test_rand_array),axis=1)
prediction_rand_array_inception = np.argmax(model_inception.predict(image_test_rand_array),axis=1)

plt.figure(figsize=(num_rows*3,num_cols*3))
# fig, axes1 = plt.subplots(num_rows,num_cols,figsize=(num_rows*2,num_cols2))

for i in range(num_rows):
    for j in range(num_cols):
        index = i * num_cols + j
        plt.subplot(num_rows,num_cols,index+1)
        image = image_test_rand_array[index]/255.0      # Extract the image
        label = label_test_rand_array[index]            # Extract the label
        prediction_resnet = prediction_rand_array_resnet[index]
        prediction_inception = prediction_rand_array_inception[index]

        # Original pictures (no augmentation layer applied)
        plt.axis("off")
        # Display the image
        plt.imshow(image)
        plt.title(f"Label: {label_dict[label]}\n"
                  f"ResNet predicts: {label_dict[prediction_resnet]} \n"
                  f"Inception predicts: {label_dict[prediction_inception]}",
                  fontsize = 8) 

plt.tight_layout()
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 477ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 283ms/step
No description has been provided for this image